########################################################
# Andreas Erwin Murr (andreas.murr@politics.ox.ac.uk)
# "The Party Leadership Model: An Early Forecast of 
# the 2015 British General Election"
# Research & Politics replication materials
# Requires: Leader.RData, Polls1966.csv, Polls1979.csv,
#           Polls1983.csv, Polls1987.csv, Polls1992.csv,
#           Polls1997.csv, Polls2001.csv (datasets)
# 15 November 2014
########################################################

# load leader data set	
	
	load("Leader.RData")

# ===========
# = table 1 =
# ===========

	Leader[c("date.gen", "date.con", "ball.con", "name.ldr.con", "name.ctn.con", "vote.ldr.con", "vote.ctn.con", "pop.con")]
	Leader[c("date.gen", "date.lab", "ball.lab", "name.ldr.lab", "name.ctn.lab", "vote.ldr.lab", "vote.ctn.lab", "pop.lab")]

# ===========
# = table 2 =
# ===========

	Leader$win <- with(Leader, ifelse(seat.con>seat.lab, "CON", "LAB"))
	Leader$inc <- c("LAB", Leader$win[-13])
	Leader$pre <- with(Leader, ifelse(pop.con>pop.lab, "CON", "LAB"))
	Leader$date.gen==Leader$date.gen.lag

	Leader[c("date.gen", "inc", "pop.con", "pop.lab", "pre", "win")]
  
# ===========
# = table 3 =
# ===========

  Leader$date.for.na <- Leader$date.for
	Leader$date.for.na[Leader$pos==0] <- NA
	Leader$lead <- with(Leader, date.gen-date.for)
	Leader$lead[Leader$pos==0] <- NA

  Leader[c("date.con", "date.lab", "date.for.na", "date.gen", "lead")]	
	mean(Leader$lead, na.rm=TRUE)

# ===========================================
# = poll of polls vs party leadership model =
# ===========================================

# set date class for loading data sets

	setClass("myDatePolls")
	setAs("character","myDatePolls", function(from){as.Date(from, format="%d/%m/%Y")})

# load polling data

	Polls1966 <- read.csv("Polls1966.csv", header=TRUE, colClasses=c("factor","myDatePolls",rep("numeric", 3)))

	Polls1979 <- read.csv("Polls1979.csv", header=TRUE, colClasses=c("factor","myDatePolls",rep("numeric", 2)))
	Polls1983 <- read.csv("Polls1983.csv", header=TRUE, colClasses=c("factor","myDatePolls",rep("numeric", 2)))
	Polls1987 <- read.csv("Polls1987.csv", header=TRUE, colClasses=c("factor","myDatePolls",rep("numeric", 2)))
	Polls1992 <- read.csv("Polls1992.csv", header=TRUE, colClasses=c("factor","myDatePolls",rep("numeric", 2)))
	Polls1997 <- read.csv("Polls1997.csv", header=TRUE, colClasses=c("factor","myDatePolls",rep("numeric", 2)))
	Polls2001 <- read.csv("Polls2001.csv", header=TRUE, colClasses=c("factor","myDatePolls",rep("numeric", 2)))
	Polls2010 <- read.csv("Polls2010.csv", header=TRUE, colClasses=c("factor","myDatePolls",rep("numeric", 2)))
	
	Polls1979$dayna <- 0
	Polls1983$dayna <- 0
	Polls1987$dayna <- 0
	Polls1992$dayna <- 0
	Polls1997$dayna <- 0
	Polls2001$dayna <- 0
	Polls2010$dayna <- 0

# combine polling data

	Polls <- rbind(Polls1966, Polls1979, Polls1983, Polls1987, Polls1992, Polls1997, Polls2001, Polls2010) 
	
# does the mean of the polling margin predict the election outcome?

	MeanConLead <- function(survey.date, polls.data, forecast.date, previous.election.date, days){
		n <- length(forecast.date)
		lead.con <- rep(NA, n)
		n.polls <- rep(NA, n)
		for (i in 1:n){
			# identify polls before forecast date
			before <- (survey.date < forecast.date[i]) & (survey.date >= forecast.date[i] - days) & (survey.date > previous.election.date[i])
			# select polls before forecast date
			Select <- polls.data[before,]
			# calculate mean conservative lead
			lead.con[i] <- with(Select, mean(con - lab))
			# count number of polls
			n.polls[i] <- dim(Select)[1]
		}
		rbind(lead.con, n.polls)
	}
	forecast.date <- na.omit(Leader$date.for[(Leader$new.ldr.con==1|Leader$new.ldr.lab==1)  & Leader$pos==1])[-c(8)]
	previous.election.date <- Leader$date.gen.lag[Leader$date.for %in% forecast.date]
	polls.data <- Polls	

	Sel <- Leader[Leader$date.for%in%forecast.date & Leader$pos==1 & (Leader$new.ldr.con!=0 | Leader$new.ldr.lab!=0),]
	poll1 <- MeanConLead(Polls$surveydate, polls.data, forecast.date, previous.election.date, 1*28)
	poll2 <- MeanConLead(Polls$surveydate, polls.data, forecast.date, previous.election.date, 2*28)
	poll4 <- MeanConLead(Polls$surveydate, polls.data, forecast.date, previous.election.date, 4*28)
	poll8 <- MeanConLead(Polls$surveydate, polls.data, forecast.date, previous.election.date, 8*28)
	poll16 <- MeanConLead(Polls$surveydate, polls.data, forecast.date, previous.election.date, 16*28)
	poll32 <- MeanConLead(Polls$surveydate, polls.data, forecast.date, previous.election.date, 32*28)
	poll48 <- MeanConLead(Polls$surveydate, polls.data, forecast.date, previous.election.date, 48*28)	
	forecast.leader <- with(Sel, pop.con>pop.lab)
	result <- with(Sel, seat.con>seat.lab)
  acu.leader <- forecast.leader==result
  acu.poll1 <- (poll1[1,]>0)==result
  acu.poll2 <- (poll2[1,]>0)==result
  acu.poll4 <- (poll4[1,]>0)==result
  acu.poll8 <- (poll8[1,]>0)==result
  acu.poll16 <- (poll16[1,]>0)==result
  acu.poll32 <- (poll32[1,]>0)==result
  acu.poll48 <- (poll48[1,]>0)==result
	
	res <- rbind( 
		c(acu.poll1, mean(acu.poll1)), c(poll1[2,], mean(poll1[2,])), 
		c(acu.poll2, mean(acu.poll2)), c(poll2[2,], mean(poll2[2,])),
		c(acu.poll4, mean(acu.poll4)), c(poll4[2,], mean(poll4[2,])),
		c(acu.poll8, mean(acu.poll8)), c(poll8[2,], mean(poll8[2,])),
		c(acu.poll16, mean(acu.poll16)), c(poll16[2,], mean(poll16[2,])),
		c(acu.poll32, mean(acu.poll32)), c(poll32[2,], mean(poll32[2,])),
		c(acu.poll48, mean(acu.poll48)), c(poll48[2,], mean(poll48[2,])),
		c(acu.leader, mean(acu.leader)), rep(2, 8))

# ===========
# = table 4 =
# ===========
		
	library(xtable)
	xtable(res, digits=c(rep(0, 8), 2))

# ===========
# = table 5 =
# ===========

	Leader$better <- with(Leader, ifelse(inc=="CON", pop.con>pop.lab, pop.con<pop.lab))
	Leader$reelected <- with(Leader, inc==win)
		
	na.omit(with(Leader, date.gen[reelected==TRUE & better==FALSE & pos==1]))
	na.omit(with(Leader, date.gen[reelected==FALSE & better==FALSE & pos==1]))
	na.omit(with(Leader, date.gen[reelected==TRUE & better==TRUE & pos==1]))
	na.omit(with(Leader, date.gen[reelected==FALSE & better==TRUE & pos==1]))

	with(Leader, table(reelected, better))

# =====================
# = bayesian analysis =
# =====================

# load packages

	library(rjags)
	library(superdiag)

# bayesian 2-by-2 table analysis
  
  modelString <- "model {
  # uniform beta(1,1) prior on both
    theta0 ~ dbeta(2, 4)
    theta1 ~ dbeta(6, 2)  
  }"
  write(modelString, "model.txt")

  parameters <- c("theta0", "theta1" ) # The parameter(s) to be monitored.
  adaptSteps <- 1000000 # Number of steps to "tune" the samplers.
  nChains <- 10 # Number of chains to run.
  nIter <- 1000000 # Steps per chain.
  # Create, initialize, and adapt the model:
  jagsModel <- jags.model("model.txt", n.chains = nChains, n.adapt = adaptSteps)

  codaSamples <- coda.samples(jagsModel, variable.names = parameters, n.iter = nIter)

# diagnostics
	
  mcmcChain <- as.matrix(codaSamples)
	
  superdiag(codaSamples, burnin=0)

# compute quantities of interest

  theta0Sample <- mcmcChain[,"theta0"]
  theta1Sample <- mcmcChain[,"theta1"]

	mean(theta0Sample) # chance of a re-election of david cameron if labour elects a more popular leader
	mean(theta1Sample) # chance of re-election if david cameron stays more popular than a labour leader
	
  q <- theta1Sample - theta0Sample
  prop.table(table(q > 0))

# ============
# = figure 1 =
# ============
	
	par(mar=c(3,1,0,1), mgp=c(2,.7,0), tck=-.01, cex=2, las=1, mfrow=c(2,1))
	plot(density(theta0Sample), xlim=c(-.5,1), ylim=c(0,3.5), lwd=3, axes=FALSE, main="", xlab="", ylab="")
	axis(1)	
	lines(density(theta1Sample), lwd=3)
	text(expression(theta[1]), x=5/6, y=3.1, cex=1.5)
	text(expression(theta[0]), x=1/4, y=2.4, cex=1.5)
	plot(density(q), ylim=c(0,3.5), xlim=c(-.5,1), lwd=3, axes=FALSE, main="", xlab="", ylab="")
	axis(1)
	text(expression(theta[1]-theta[0]), x=(5/6)-(1/4), y=2, adj=1.1, cex=1.5)
	axis(1)
	abline(v=0, lty=2, lwd=1.5)
	
# ===================
# = end source code =
# ===================	